import scrapy

from scrapy_movice_42.items import ScrapyMovice42Item


class MoviceSpider(scrapy.Spider):
    name = "movice"
    allowed_domains = ["www.dytt8.com"]
    start_urls = ["https://www.dytt8.com/html/gndy/china/index.html"]

    def parse(self, response):
        # name_list = '//body//div//table//tr[2]/td[2]/b/a[2]/text()'
        # href_list = '//body//div//table//tr[2]/td[2]/b/a[2]/@href'
        # 要第一个的名字  和 第二页的图片
        base_list = response.xpath('//body//div//table//tr[2]/td[2]/b/a[2]')

        for b in base_list:
            # 获取第一页的name 和 要点击的链接
            name_list = b.xpath('./text()').extract_first()
            href_list = b.xpath('./@href').extract_first()

            # 1.https://www.dytt8.com/html/gndy/china/index.html
            # 2.https://www.dytt8.com/html/gndy/china/list_4_2.html
            # 3.https://www.dytt8.com/html/gndy/china/list_4_3.html
            # 第二页url是
            url = "https://www.dytt8.com" + href_list

            # 对第二页的链接发起访问
            yield scrapy.Request(url=url, callback=self.parse_second, meta={"name": name_list})

    def parse_second(self, respoonse):
        # 如果这里出现问题一定先检查  xpath路径是否正确
        src = respoonse.xpath('//body//div[@id="Zoom"]//img/@src').extract_first()

        name = respoonse.meta['name']

        movie = ScrapyMovice42Item(src=src, name=name)
        yield movie
